#title: U.S. Military Training, External Support, and Security Defections in NMs 
#authors: Ilker Kalin
#date: "11/02/2024"
#output:
  
### Load necessary R packages 
  
library(data.table)  
library(tidyr) 
library(psych)
library(ggpubr)
library(ggplot2)
library(dplyr)
library(tidyverse)
library(stargazer)
library(effects)
library(margins)
library(sandwich)
library(lmtest)
library(AER)
library(mfx)
library(readxl)
library(ResourceSelection)
library(stringr)
library(reshape)
library(reshape2)
library(tibble)
library(multiwayvcov)
library(ggeffects)
library(sjPlot)
library(sjmisc)
library(logistf)

version # version check

### Load primary dataset

data1 <- read_csv ("mydata1.csv") 

data1 <- data1[,-1]

## Get the number of campaigns referred in the main text (in Research Design & Methodology)

df_uniq <- unique(data1$id)
df_uniq
length(df_uniq)

names(data1)

sapply(data1, function(x) sum(is.na(x)))

### Operationalization ###

## DV: security defection (sec_defect) ##

data1$sec_defect <- as.factor(data1$sec_defect)

## IVs ##

# international_media (in_media)
summary(data1$in_media)

data1$in_media_dum <- ifelse(data1$in_media == 2, 1, 0)

# international sanction (sdirect)
summary(data1$sdirect)

# Western campaign support (West_camp_sup) - without US
table(data1$West_camp_sup)

data1$West_camp_sup_dum <- ifelse(data1$West_camp_sup >= 1, 1, 0)

# US support for campaing (US_camp_sup)
describe(data1$US_camp_sup)

# Western support with *US included* (Western_sup)
data1$Western_sup <- rowSums(data1[,c("US_camp_sup", "West_camp_sup")], na.rm=TRUE)
table(data1$Western_sup)
data1$Western_sup_dum <- ifelse(data1$Western_sup >= 1, 1, 0)

# setting dummy variables as factors 

data1$US_camp_sup <- as.factor(data1$US_camp_sup)

data1$West_camp_sup_dum <- as.factor(data1$West_camp_sup_dum)

data1$Western_sup_dum <- as.factor(data1$Western_sup_dum)

## US_WEST category ##

data1$US_WEST <- as.factor(ifelse(data1$US_camp_sup ==0 & data1$West_camp_sup_dum ==0, '0',
                                  ifelse(data1$US_camp_sup ==1 & data1$West_camp_sup_dum ==0, '1',
                                         ifelse(data1$US_camp_sup ==1 & data1$West_camp_sup_dum ==1, '2', '0'))))


data1$US_WEST3 <- as.factor(ifelse(data1$US_camp_sup ==0 & data1$West_camp_sup_dum ==0, '0',
                                 ifelse(data1$US_camp_sup ==0 & data1$West_camp_sup_dum ==1, '1',
                                        ifelse(data1$US_camp_sup ==1 & data1$West_camp_sup_dum ==0, '2', 
                                             ifelse(data1$US_camp_sup ==1 & data1$West_camp_sup_dum ==1,  '3', '0')))))

table(data1$US_WEST3)

# US-training variables #

summary(data1$totalstudents_5)

summary(data1$ttstudents)

data1$ttstudents_log <- log(data1$ttstudents+1) #iv

data1$totalstudents_log <- log(data1$totalstudents+1) #iv

data1$totalstudents_5log <- log(data1$totalstudents_5+1) 

data1$totalstudents_10log <- log(data1$totalstudents_10+1) 

data1$totalstudents_15log <- log(data1$totalstudents_15+1) 

data1$totalstudents_20log <- log(data1$totalstudents_20+1) 

## CVs ## 

table(data1$regime_change) # regime change
data1$regime_change <- as.factor(data1$regime_change)

describe(data1$camp_size) # campaign size 

describe(data1$repression) # repression 

describe(data1$diversity) # diversity 

describe(data1$lag.regime) # lagged democracy score
data1$democ_dum <- as.factor(ifelse(data1$lag.regime >= 6, 1, 0))
table(data1$democ_dum)

describe(data1$lag.lngdp) # lagged GDP

describe(data1$reg_sup) # external state support for the regime 
data1$reg_sup <- as.factor(data1$reg_sup)

### Analysis ### 

# main models # in Stata 



### APPENDIX ### 

#Table1a# 

summary(data1)

#Table2a#

model1 <- glm(sec_defect ~ totalstudents_log + Western_sup_dum + camp_size +
                regime_change + repression + diversity + democ_dum + lag.lngdp + 
                reg_sup + sdirect + in_media_dum, family=binomial(link="logit"), data=data1)

summary(model1)

coeftest(model1, vcov = vcovCL, cluster = ~ ccode)


model2 <- glm(sec_defect ~ totalstudents_log + US_WEST + camp_size + 
                regime_change + repression + diversity + democ_dum + lag.lngdp + 
                reg_sup + sdirect + in_media_dum, family=binomial(link="logit"), data=data1)

summary(model2)

coeftest(model2, vcov = vcovCL, cluster = ~ ccode)


#table3a# with region fixed effect in Stata

#table4a# with year fixed effect in Stata 

#table5a# with alternative measurements of US-training in Stata 

#table6a# without repression

model1 <- glm(sec_defect ~ totalstudents_log*Western_sup_dum + camp_size +
                regime_change + diversity + democ_dum + lag.lngdp + 
                reg_sup + sdirect + in_media_dum, family=binomial(link="logit"), data=data1)

coeftest(model1, vcov = vcovCL, cluster = ~ ccode)



model2 <- glm(sec_defect ~ totalstudents_log*US_WEST + camp_size +
                regime_change + diversity + democ_dum + lag.lngdp + 
                reg_sup + sdirect + in_media_dum, family=binomial(link="logit"), data=data1)

coeftest(model2, vcov = vcovCL, cluster = ~ ccode)

#table7a# OLS regression in Stata

#table8a# rare event regression 

library(logistf)

model1a <- logistf(sec_defect ~ totalstudents_log*Western_sup_dum + camp_size +
                     regime_change + repression + diversity + democ_dum + lag.lngdp + 
                     reg_sup + sdirect + in_media_dum, data = data1, firth = TRUE)

model2a <- logistf(sec_defect ~ totalstudents_log*US_WEST + camp_size +
                     regime_change + repression + diversity + democ_dum + lag.lngdp + 
                     reg_sup + sdirect + in_media_dum, data = data1, firth = TRUE)


summary(model1a)
summary(model2a)

#table9a# adding US_WEST3 variable (no US support, but Western country sup) - 9 cases

table(data1$US_WEST3)

modelx <- glm(sec_defect ~ totalstudents_log*US_WEST3 + camp_size +
                regime_change + repression + diversity + democ_dum + lag.lngdp + 
                reg_sup + sdirect + in_media_dum, family=binomial(link="logit"), data=data1)

coeftest(modelx, vcov = vcovCL, cluster = ~ ccode)


# Diagnostics # 

# Hosmer-Lemeshow # Table 1, Models 1 and 2

h1 <- hoslem.test(model1$y, fitted(model1), g=10)
h1

h2 <- hoslem.test(model2$y, fitted(model2), g=10)
h2

# Collinearity Test # Table2a, Model1
car::vif(model1) 
car::vif(model2) 

